#!python
#cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True

import numpy as np
cimport numpy as np

__all__ = []

np.import_array()


cdef extern from "numpy/random/distributions.h":
    # Generate random numbers in closed interval [off, off + rng].
    uint64_t random_bounded_uint64(bitgen_t *bitgen_state,
                                   uint64_t off, uint64_t rng,
                                   uint64_t mask, bint use_masked) nogil
    uint32_t random_buffered_bounded_uint32(bitgen_t *bitgen_state,
                                            uint32_t off, uint32_t rng,
                                            uint32_t mask, bint use_masked,
                                            int *bcnt, uint32_t *buf) nogil
    uint16_t random_buffered_bounded_uint16(bitgen_t *bitgen_state,
                                            uint16_t off, uint16_t rng,
                                            uint16_t mask, bint use_masked,
                                            int *bcnt, uint32_t *buf) nogil
    uint8_t random_buffered_bounded_uint8(bitgen_t *bitgen_state,
                                          uint8_t off, uint8_t rng,
                                          uint8_t mask, bint use_masked,
                                          int *bcnt, uint32_t *buf) nogil
    np.npy_bool random_buffered_bounded_bool(bitgen_t *bitgen_state,
                                             np.npy_bool off, np.npy_bool rng,
                                             np.npy_bool mask, bint use_masked,
                                             int *bcnt, uint32_t *buf) nogil
    void random_bounded_uint64_fill(bitgen_t *bitgen_state,
                                    uint64_t off, uint64_t rng, np.npy_intp cnt,
                                    bint use_masked,
                                    uint64_t *out) nogil
    void random_bounded_uint32_fill(bitgen_t *bitgen_state,
                                    uint32_t off, uint32_t rng, np.npy_intp cnt,
                                    bint use_masked,
                                    uint32_t *out) nogil
    void random_bounded_uint16_fill(bitgen_t *bitgen_state,
                                    uint16_t off, uint16_t rng, np.npy_intp cnt,
                                    bint use_masked,
                                    uint16_t *out) nogil
    void random_bounded_uint8_fill(bitgen_t *bitgen_state,
                                   uint8_t off, uint8_t rng, np.npy_intp cnt,
                                   bint use_masked,
                                   uint8_t *out) nogil
    void random_bounded_bool_fill(bitgen_t *bitgen_state,
                                  np.npy_bool off, np.npy_bool rng, np.npy_intp cnt,
                                  bint use_masked,
                                  np.npy_bool *out) nogil


cdef object format_bounds_error(bint closed, object low):
    # Special case low == 0 to provide a better exception for users
    # since low = 0 is the default single-argument case.
    if not np.any(low):
        comp = '<' if closed else '<='
        return f'high {comp} 0'
    else:
        comp = '>' if closed else '>='
        return f'low {comp} high'


{{
py:
type_info = (('uint32', 'uint32', 'uint64', 'NPY_UINT64', 0, 0, 0, '0X100000000ULL'),
          ('uint16', 'uint16', 'uint32', 'NPY_UINT32', 1, 16, 0, '0X10000UL'),
          ('uint8', 'uint8', 'uint16', 'NPY_UINT16', 3, 8, 0, '0X100UL'),
          ('bool','bool', 'uint8', 'NPY_UINT8', 31, 1, 0, '0x2UL'),
          ('int32', 'uint32', 'uint64', 'NPY_INT64', 0, 0, '-0x80000000LL', '0x80000000LL'),
          ('int16', 'uint16', 'uint32', 'NPY_INT32', 1, 16, '-0x8000LL', '0x8000LL' ),
          ('int8', 'uint8', 'uint16', 'NPY_INT16', 3, 8, '-0x80LL', '0x80LL' ),
)}}
{{for  nptype, utype, nptype_up, npctype, remaining, bitshift, lb, ub in type_info}}
{{ py: otype = nptype + '_' if nptype == 'bool' else nptype }}
cdef object _rand_{{nptype}}_broadcast(np.ndarray low, np.ndarray high, object size,
                                       bint use_masked, bint closed,
                                       bitgen_t *state, object lock):
    """
    Array path for smaller integer types

    This path is simpler since the high value in the open interval [low, high)
    must be in-range for the next larger type, {{nptype_up}}. Here we case to
    this type for checking and the recast to {{nptype}} when producing the
    random integers.
    """
    cdef {{utype}}_t rng, last_rng, off, val, mask, out_val, is_open
    cdef uint32_t buf
    cdef {{utype}}_t *out_data
    cdef {{nptype_up}}_t low_v, high_v
    cdef np.ndarray low_arr, high_arr, out_arr
    cdef np.npy_intp i, cnt
    cdef np.broadcast it
    cdef int buf_rem = 0

    # Array path
    is_open = not closed
    low_arr = <np.ndarray>low
    high_arr = <np.ndarray>high

    if np.can_cast(low_arr, np.{{otype}}):
        pass  # cannot be out-of-bounds
    elif np.any(np.less(low_arr, np.{{otype}}({{lb}}))):
        raise ValueError('low is out of bounds for {{nptype}}')

    if closed:
        high_comp = np.greater_equal
        low_high_comp = np.greater
    else:
        high_comp = np.greater
        low_high_comp = np.greater_equal

    if np.can_cast(high_arr, np.{{otype}}):
        pass  # cannot be out-of-bounds
    elif np.any(high_comp(high_arr, np.{{nptype_up}}({{ub}}))):
        raise ValueError('high is out of bounds for {{nptype}}')

    if np.any(low_high_comp(low_arr, high_arr)):
        raise ValueError(format_bounds_error(closed, low_arr))

    low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.{{npctype}}, np.NPY_ALIGNED | np.NPY_FORCECAST)
    high_arr = <np.ndarray>np.PyArray_FROM_OTF(high, np.{{npctype}}, np.NPY_ALIGNED | np.NPY_FORCECAST)

    if size is not None:
        out_arr = <np.ndarray>np.empty(size, np.{{otype}})
    else:
        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
        out_arr = <np.ndarray>np.empty(it.shape, np.{{otype}})

    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
    out_data = <{{utype}}_t *>np.PyArray_DATA(out_arr)
    cnt = np.PyArray_SIZE(out_arr)
    mask = last_rng = 0
    with lock, nogil:
        for i in range(cnt):
            low_v = (<{{nptype_up}}_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
            high_v = (<{{nptype_up}}_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
            # Subtract 1 since generator produces values on the closed int [off, off+rng]
            rng = <{{utype}}_t>((high_v - is_open) - low_v)
            off = <{{utype}}_t>(<{{nptype_up}}_t>low_v)

            if rng != last_rng:
                # Smallest bit mask >= max
                mask = <{{utype}}_t>_gen_mask(rng)

            out_data[i] = random_buffered_bounded_{{utype}}(state, off, rng, mask, use_masked, &buf_rem, &buf)

            np.PyArray_MultiIter_NEXT(it)
    return out_arr
{{endfor}}
{{
py:
big_type_info = (('uint64', 'uint64', 'NPY_UINT64', '0x0ULL', '0xFFFFFFFFFFFFFFFFULL'),
                 ('int64', 'uint64', 'NPY_INT64', '-0x8000000000000000LL', '0x7FFFFFFFFFFFFFFFLL' )
)}}
{{for  nptype, utype, npctype, lb, ub in big_type_info}}
{{ py: otype = nptype}}
cdef object _rand_{{nptype}}_broadcast(object low, object high, object size,
                                       bint use_masked, bint closed,
                                       bitgen_t *state, object lock):
    """
    Array path for 64-bit integer types

    Requires special treatment since the high value can be out-of-range for
    the largest (64 bit) integer type since the generator is specified on the
    interval [low,high).

    The internal generator does not have this issue since it generates from
    the closes interval [low, high-1] and high-1 is always in range for the
    64 bit integer type.
    """

    cdef np.ndarray low_arr, low_arr_orig, high_arr, high_arr_orig, out_arr
    cdef np.npy_intp i, cnt, n
    cdef np.broadcast it
    cdef object closed_upper
    cdef uint64_t *out_data
    cdef {{nptype}}_t *highm1_data
    cdef {{nptype}}_t low_v, high_v
    cdef uint64_t rng, last_rng, val, mask, off, out_val, is_open

    low_arr_orig = <np.ndarray>low
    high_arr_orig = <np.ndarray>high

    is_open = not closed

    # The following code tries to cast safely, but failing that goes via
    # Python `int()` because it is very difficult to cast integers in a
    # truly safe way (i.e. so it raises on out-of-bound).
    # We correct if the interval is not closed in this step if we go the long
    # route.  (Not otherwise, since the -1 could overflow in theory.)
    if np.can_cast(low_arr_orig, np.{{otype}}):
        low_arr = <np.ndarray>np.PyArray_FROM_OTF(low_arr_orig, np.{{npctype}}, np.NPY_ALIGNED)
    else:
        low_arr = <np.ndarray>np.empty_like(low_arr_orig, dtype=np.{{otype}})
        flat = low_arr_orig.flat
        low_data = <{{nptype}}_t *>np.PyArray_DATA(low_arr)
        cnt = np.PyArray_SIZE(low_arr)
        for i in range(cnt):
            lower = int(flat[i])
            if lower < {{lb}} or lower > {{ub}}:
                raise ValueError('low is out of bounds for {{nptype}}')
            low_data[i] = lower

    del low_arr_orig

    if np.can_cast(high_arr_orig, np.{{otype}}):
        high_arr = <np.ndarray>np.PyArray_FROM_OTF(high_arr_orig, np.{{npctype}}, np.NPY_ALIGNED)
    else:
        high_arr = np.empty_like(high_arr_orig, dtype=np.{{otype}})
        flat = high_arr_orig.flat
        high_data = <{{nptype}}_t *>np.PyArray_DATA(high_arr)
        cnt = np.PyArray_SIZE(high_arr)
        for i in range(cnt):
            closed_upper = int(flat[i]) - is_open
            if closed_upper > {{ub}}:
                raise ValueError('high is out of bounds for {{nptype}}')
            if closed_upper < {{lb}}:
                raise ValueError(format_bounds_error(closed, low_arr))
            high_data[i] = closed_upper

        is_open = 0  # we applied is_open in this path already

    del high_arr_orig

    # Since we have the largest supported integer dtypes, they must be within
    # range at this point; otherwise conversion would have failed.  Check that
    # it is never true that `high <= low`` if closed and `high < low` if not
    if not is_open:
        low_high_comp = np.greater
    else:
        low_high_comp = np.greater_equal

    if np.any(low_high_comp(low_arr, high_arr)):
        raise ValueError(format_bounds_error(closed, low_arr))

    if size is not None:
        out_arr = <np.ndarray>np.empty(size, np.{{otype}})
    else:
        it = np.PyArray_MultiIterNew2(low_arr, high_arr)
        out_arr = <np.ndarray>np.empty(it.shape, np.{{otype}})

    it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
    out_data = <uint64_t *>np.PyArray_DATA(out_arr)
    n = np.PyArray_SIZE(out_arr)
    mask = last_rng = 0
    with lock, nogil:
        for i in range(n):
            low_v = (<{{nptype}}_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
            high_v = (<{{nptype}}_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
            # Generator produces values on the closed int [off, off+rng]
            rng = <{{utype}}_t>((high_v - is_open) - low_v)
            off = <{{utype}}_t>(<{{nptype}}_t>low_v)

            if rng != last_rng:
                mask = _gen_mask(rng)
            out_data[i] = random_bounded_uint64(state, off, rng, mask, use_masked)

            np.PyArray_MultiIter_NEXT(it)

    return out_arr
{{endfor}}
{{
py:
type_info = (('uint64', 'uint64', '0x0ULL', '0xFFFFFFFFFFFFFFFFULL'),
             ('uint32', 'uint32', '0x0UL', '0XFFFFFFFFUL'),
             ('uint16', 'uint16', '0x0UL', '0XFFFFUL'),
             ('uint8', 'uint8', '0x0UL', '0XFFUL'),
             ('bool', 'bool', '0x0UL', '0x1UL'),
             ('int64', 'uint64', '-0x8000000000000000LL', '0x7FFFFFFFFFFFFFFFL'),
             ('int32', 'uint32', '-0x80000000L', '0x7FFFFFFFL'),
             ('int16', 'uint16', '-0x8000L', '0x7FFFL' ),
             ('int8', 'uint8', '-0x80L', '0x7FL' )
)}}
{{for  nptype, utype, lb, ub in type_info}}
{{ py: otype = nptype + '_' if nptype == 'bool' else nptype }}
cdef object _rand_{{nptype}}(object low, object high, object size,
                             bint use_masked, bint closed,
                             bitgen_t *state, object lock):
    """
    _rand_{{nptype}}(low, high, size, use_masked, *state, lock)

    Return random `np.{{otype}}` integers from `low` (inclusive) to `high` (exclusive).

    Return random integers from the "discrete uniform" distribution in the
    interval [`low`, `high`).  If `high` is None (the default),
    then results are from [0, `low`). On entry the arguments are presumed
    to have been validated for size and order for the `np.{{otype}}` type.

    Parameters
    ----------
    low : int or array-like
        Lowest (signed) integer to be drawn from the distribution (unless
        ``high=None``, in which case this parameter is the *highest* such
        integer).
    high : int or array-like
        If provided, one above the largest (signed) integer to be drawn from the
        distribution (see above for behavior if ``high=None``).
    size : int or tuple of ints
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  Default is None, in which case a
        single value is returned.
    use_masked : bool
        If True then rejection sampling with a range mask is used else Lemire's algorithm is used.
    closed : bool
        If True then sample from [low, high].  If False, sample [low, high)
    state : bit generator
        Bit generator state to use in the core random number generators
    lock : threading.Lock
        Lock to prevent multiple using a single generator simultaneously

    Returns
    -------
    out : python scalar or ndarray of np.{{otype}}
          `size`-shaped array of random integers from the appropriate
          distribution, or a single such random int if `size` not provided.

    Notes
    -----
    The internal integer generator produces values from the closed
    interval [low, high-(not closed)].  This requires some care since
    high can be out-of-range for {{utype}}. The scalar path leaves
    integers as Python integers until the 1 has been subtracted to
    avoid needing to cast to a larger type.
    """
    cdef np.ndarray out_arr, low_arr, high_arr
    cdef {{utype}}_t rng, off, out_val
    cdef {{utype}}_t *out_data
    cdef np.npy_intp i, n, cnt

    if size is not None:
        if (np.prod(size) == 0):
            return np.empty(size, dtype=np.{{otype}})

    low_arr = <np.ndarray>np.asarray(low)
    high_arr = <np.ndarray>np.asarray(high)
    low_ndim = np.PyArray_NDIM(low_arr)
    high_ndim = np.PyArray_NDIM(high_arr)
    if low_ndim == 0 and high_ndim == 0:
        low = int(low_arr)
        high = int(high_arr)
        # Subtract 1 since internal generator produces on closed interval [low, high]
        if not closed:
            high -= 1

        if low < {{lb}}:
            raise ValueError("low is out of bounds for {{nptype}}")
        if high > {{ub}}:
            raise ValueError("high is out of bounds for {{nptype}}")
        if low > high:  # -1 already subtracted, closed interval
            raise ValueError(format_bounds_error(closed, low))

        rng = <{{utype}}_t>(high - low)
        off = <{{utype}}_t>(<{{nptype}}_t>low)
        if size is None:
            with lock:
                random_bounded_{{utype}}_fill(state, off, rng, 1, use_masked, &out_val)
            return np.{{otype}}(<{{nptype}}_t>out_val)
        else:
            out_arr = <np.ndarray>np.empty(size, np.{{otype}})
            cnt = np.PyArray_SIZE(out_arr)
            out_data = <{{utype}}_t *>np.PyArray_DATA(out_arr)
            with lock, nogil:
                random_bounded_{{utype}}_fill(state, off, rng, cnt, use_masked, out_data)
            return out_arr
    return _rand_{{nptype}}_broadcast(low_arr, high_arr, size, use_masked, closed, state, lock)
{{endfor}}
